Consider the following two data sets.
# load packages
library(tidyverse)
# initiate data frame on persons personal spending
df_c <- data.frame(id = c(1:3,1:3),
money_spent= c(1000, 2000, 6000, 1500, 3000, 5500),
currency = c("CHF", "CHF", "USD", "EUR", "CHF", "USD"),
year=c(2017,2017,2017,2018,2018,2018))
df_c## id money_spent currency year
## 1 1 1000 CHF 2017
## 2 2 2000 CHF 2017
## 3 3 6000 USD 2017
## 4 1 1500 EUR 2018
## 5 2 3000 CHF 2018
## 6 3 5500 USD 2018
# initiate data frame on persons' characteristics
df_p <- data.frame(id = 1:4,
first_name = c("Anna", "Betty", "Claire", "Diane"),
profession = c("Economist", "Data Scientist", "Data Scientist", "Economist"))
df_p## id first_name profession
## 1 1 Anna Economist
## 2 2 Betty Data Scientist
## 3 3 Claire Data Scientist
## 4 4 Diane Economist
df_c contains information on person’s consumption
spending, df_p information on these person’s personal
characteristics. We can merge/join the two data frames via a unique
person identifier in column id. There are four basic ways
of joining the two datasets when not all of the observations are
complete:
df_merged <- merge(df_p, df_c, by="id")
df_merged## id first_name profession money_spent currency year
## 1 1 Anna Economist 1000 CHF 2017
## 2 1 Anna Economist 1500 EUR 2018
## 3 2 Betty Data Scientist 2000 CHF 2017
## 4 2 Betty Data Scientist 3000 CHF 2018
## 5 3 Claire Data Scientist 6000 USD 2017
## 6 3 Claire Data Scientist 5500 USD 2018
NAs.df_merged <- merge(df_p, df_c, by="id", all=TRUE)
df_merged## id first_name profession money_spent currency year
## 1 1 Anna Economist 1000 CHF 2017
## 2 1 Anna Economist 1500 EUR 2018
## 3 2 Betty Data Scientist 2000 CHF 2017
## 4 2 Betty Data Scientist 3000 CHF 2018
## 5 3 Claire Data Scientist 6000 USD 2017
## 6 3 Claire Data Scientist 5500 USD 2018
## 7 4 Diane Economist NA <NA> NA
NAs.df_merged <- merge(df_p, df_c, by="id", all.x = TRUE, all.y = FALSE)
df_merged## id first_name profession money_spent currency year
## 1 1 Anna Economist 1000 CHF 2017
## 2 1 Anna Economist 1500 EUR 2018
## 3 2 Betty Data Scientist 2000 CHF 2017
## 4 2 Betty Data Scientist 3000 CHF 2018
## 5 3 Claire Data Scientist 6000 USD 2017
## 6 3 Claire Data Scientist 5500 USD 2018
## 7 4 Diane Economist NA <NA> NA
NAs.df_merged <- merge(df_p, df_c, by="id", all.x = FALSE, all.y = TRUE)
df_merged## id first_name profession money_spent currency year
## 1 1 Anna Economist 1000 CHF 2017
## 2 1 Anna Economist 1500 EUR 2018
## 3 2 Betty Data Scientist 2000 CHF 2017
## 4 2 Betty Data Scientist 3000 CHF 2018
## 5 3 Claire Data Scientist 6000 USD 2017
## 6 3 Claire Data Scientist 5500 USD 2018